*-----------------------------------------------------------------
version 10
cap clear mata
cap clear
cap log close
set more off
cd "E:\REStat_MS14767_Vol96(2)\Data preparation Compustat"
log using "7_links.log", replace
*-----------------------------------------------------------------

set mem 600m
set mat 2000


use "raw_rjv_panel_america.dta", clear

*************
* here we retrive some relevant information (SIC4, market shares, HHI) of the firms from compustat
*************

so ticker year
merge ticker year using "compustatvars.dta"
tab _merge
label var SIC4 "Firm specific primary SIC4 code"

*******************************************************
*      _merge |      Freq.     Percent        Cum.
* ------------+-----------------------------------
*           1 |     10,989        6.45        6.45
*           2 |    102,433       60.16       66.62
*           3 |     56,832       33.38      100.00
* ------------+-----------------------------------
*       Total |    170,254      100.00
*********************************************************


*************
* drop firms in compustat, which are not in the RJV database
* drop firms in the database for which we don't have MS information
*************

drop if _merge==2
drop if _merge==1
drop _merge


**************
* we drop all observation where the firm was not yet an insider
**************

drop if ins==0

**********************
* we keep only relevant variables
************************

keep rjvname entryname entityname ticker year SIC4 sic4 MS HHI comnum rjvnum


egen comnum2=group(entityname)
drop comnum
rename comnum2 comnum

egen rjvnum2=group(rjvname)
drop rjvnum
rename rjvnum2 rjvnum

egen SIC4_=group(SIC4)
sum SIC4_
* the maximum number of industries is 220
sum year
* the maximum number of years is 14



*********************************************************************************************************************
****
****                     Here we count the links with firms from any industry
****
*********************************************************************************************************************


so year comnum rjvnum

egen seq=seq(), by(year)
egen seq_SIC=group(year)
egen S=max(seq), by(year)
*egen com=group(comnum), by(year)

sum S
* I see that the maximum number of obs. per year is  3639


local i=1
forvalues i=1/3639 {
qui by year: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year)
qui by year: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i'
drop x`i'
drop rjvnum`i'
}



* we generate two empty vectors

gen measure1=.
gen measure2=.


**** here we go into mata

clear mata
mata

rjv = st_data(. ,("rjvnum"))
comnum = st_data(. ,("comnum"))
ticker = st_sdata(. ,("ticker"))
SIC4 = st_data(. ,("SIC4"))
seq_SIC= st_data(. , ("seq_SIC")) 


measure1_allfirm = 0
measure2_allfirm = 0

index=(16::3655)'
f=st_data(.,16)
st_view(f,.,index)

max_SIC = max(seq_SIC)


for(i=1;i<=max_SIC;i++) {

rowindex = mm_which(seq_SIC:==i)
comnum_rel = comnum[rowindex]
nr_rows = length(comnum_rel)
uniquefirms = uniqrows(comnum_rel)
nr_unique = length(uniquefirms)
	measurevect1 = comnum_rel*0
	measurevect2 = comnum_rel*0

if (nr_unique==1) {
nr_entry1 = length(comnum_rel)
measurevect1=mm_repeat(0,nr_entry1)
measurevect2=mm_repeat(0,nr_entry1)

}

if (nr_unique>1) {


for (j=1;j<=nr_unique;j++) {
	firm_rel = uniquefirms[j]
	firm_position = mm_which(comnum_rel:==firm_rel)
	nr_firm_entry = length(firm_position)


	measure = f[rowindex,firm_position]
	
	check = rowmax(measure)
	measurevect = rowsum(measure)
	check2 = check-measurevect
	nonzero = mm_which(check2:<0)
	alert = length(nonzero)

	
	measurevect = rowsum(measure)
	nonzero_pos = mm_which(measurevect:>0)
	
	
	measure1 = length(measurevect[nonzero_pos])
	measure2 = length(uniqrows(measurevect[nonzero_pos]))


	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	
	
	}
} /* end if */

	measure1_allfirm = (measure1_allfirm \ measurevect1)
	measure2_allfirm = (measure2_allfirm \ measurevect2)
}


n = length(measure1_allfirm)
st_store(., ("measure1"), measure1_allfirm[2::n])
st_store(., ("measure2"), measure2_allfirm[2::n])


end


rename measure1 links1_tot
label var links1_tot "Total direct links with double counting-RJV"
rename measure2 links2_tot
label var links2_tot "Total direct links without double counting-RJV"





*********************************************************************************************************************
*********************************************************************************************************************
****
****                     Here we count the links with firms from the same industry, i.e. direct competitors
****
*********************************************************************************************************************
*********************************************************************************************************************




drop seq seq_SIC S

so year SIC4 comnum rjvnum

drop f1-f3639

egen seq=seq(), by(year SIC4)
egen seq_SIC=group(year SIC4)
egen S=max(seq), by(year SIC4)
*egen com=group(comnum), by(year SIC4)

sum S
* I see that the maximum number of obs. per SIC4/year is 478


local i=1
forvalues i=1/478 {
qui by year SIC4: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC4)
qui by year SIC4: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i'
drop x`i'
drop rjvnum`i'
}



* we generate two empty vectors

gen measure1=.
gen measure2=.


**** here we go into mata

mata

rjv = st_data(. ,("rjvnum"))
comnum = st_data(. ,("comnum"))
ticker = st_sdata(. ,("ticker"))
SIC4 = st_data(. ,("SIC4"))
seq_SIC= st_data(. , ("seq_SIC")) 


measure1_allfirm = 0
measure2_allfirm = 0

index = (18::496)'
f = st_data(.,index)

max_SIC = max(seq_SIC)


for(i=1;i<=max_SIC;i++) {

rowindex = mm_which(seq_SIC:==i)
comnum_rel = comnum[rowindex]
nr_rows = length(comnum_rel)
uniquefirms = uniqrows(comnum_rel)
nr_unique = length(uniquefirms)
	measurevect1 = comnum_rel*0
	measurevect2 = comnum_rel*0

if (nr_unique==1) {
nr_entry1 = length(comnum_rel)
measurevect1=mm_repeat(0,nr_entry1)
measurevect2=mm_repeat(0,nr_entry1)

}

if (nr_unique>1) {


for (j=1;j<=nr_unique;j++) {
	firm_rel = uniquefirms[j]
	firm_position = mm_which(comnum_rel:==firm_rel)
	nr_firm_entry = length(firm_position)


	measure = f[rowindex,firm_position]
	
	check = rowmax(measure)
	measurevect = rowsum(measure)
	check2 = check-measurevect
	nonzero = mm_which(check2:<0)
	alert = length(nonzero)

	
	measurevect = rowsum(measure)
	nonzero_pos = mm_which(measurevect:>0)
	
	
	measure1 = length(measurevect[nonzero_pos])
	measure2 = length(uniqrows(measurevect[nonzero_pos]))


	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	
	
	}
} /* end if */

	measure1_allfirm = (measure1_allfirm \ measurevect1)
	measure2_allfirm = (measure2_allfirm \ measurevect2)
}


n = length(measure1_allfirm)
st_store(., ("measure1"), measure1_allfirm[2::n])
st_store(., ("measure2"), measure2_allfirm[2::n])


end


rename measure1 links1
label var links1 "Direct links with double counting"
rename measure2 links2
label var links2 "Direct links without double counting"

*********************************************************************************************************************
****
****                     Here we count the links with firms from the same industry
****                     which are in a RJV whose SIC4 code is the same as that of the firm
****
*********************************************************************************************************************

so year SIC4 comnum rjvnum

drop f1-f478
local i=1
forvalues i=1/478 {
qui by year SIC4: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC4)
qui by year SIC4: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC4==sic4
drop x`i'
drop rjvnum`i'
}



* we generate two empty vectors

gen measure1=.
gen measure2=.


**** here we go into mata

clear mata
mata

rjv = st_data(. ,("rjvnum"))
comnum = st_data(. ,("comnum"))
ticker = st_sdata(. ,("ticker"))
SIC4 = st_data(. ,("SIC4"))
seq_SIC= st_data(. , ("seq_SIC")) 



measure1_allfirm = 0
measure2_allfirm = 0

index = (20::498)'
f = st_data(.,index)

max_SIC = max(seq_SIC)


for(i=1;i<=max_SIC;i++) {

rowindex = mm_which(seq_SIC:==i)
comnum_rel = comnum[rowindex]
nr_rows = length(comnum_rel)
uniquefirms = uniqrows(comnum_rel)
nr_unique = length(uniquefirms)
	measurevect1 = comnum_rel*0
	measurevect2 = comnum_rel*0

if (nr_unique==1) {
nr_entry1 = length(comnum_rel)
measurevect1=mm_repeat(0,nr_entry1)
measurevect2=mm_repeat(0,nr_entry1)

}

if (nr_unique>1) {


for (j=1;j<=nr_unique;j++) {
	firm_rel = uniquefirms[j]
	firm_position = mm_which(comnum_rel:==firm_rel)
	nr_firm_entry = length(firm_position)


	measure = f[rowindex,firm_position]
	
	check = rowmax(measure)
	measurevect = rowsum(measure)
	check2 = check-measurevect
	nonzero = mm_which(check2:<0)
	alert = length(nonzero)

	
	measurevect = rowsum(measure)
	nonzero_pos = mm_which(measurevect:>0)
	
	
	measure1 = length(measurevect[nonzero_pos])
	measure2 = length(uniqrows(measurevect[nonzero_pos]))


	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	
	
	}
} /* end if */

	measure1_allfirm = (measure1_allfirm \ measurevect1)
	measure2_allfirm = (measure2_allfirm \ measurevect2)
}


n = length(measure1_allfirm)
st_store(., ("measure1"), measure1_allfirm[2::n])
st_store(., ("measure2"), measure2_allfirm[2::n])


end


rename measure1 links1_same
label var links1_same "Direct links with double counting-RJV in same SIC"
rename measure2 links2_same
label var links2_same "Direct links without double counting-RJV in same SIC"


*********************************************************************************************************************
****
****                     Here we count the links with firms from the same industry
****                     which are in a RJV whose SIC4 code is the same as that of the firm
****
*********************************************************************************************************************

so year SIC4 comnum rjvnum

drop f1-f478
local i=1
forvalues i=1/478 {
qui by year SIC4: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC4)
qui by year SIC4: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC4!=sic4
drop x`i'
drop rjvnum`i'
}



* we generate two empty vectors

gen measure1=.
gen measure2=.


**** here we go into mata

clear mata
mata

rjv = st_data(. ,("rjvnum"))
comnum = st_data(. ,("comnum"))
ticker = st_sdata(. ,("ticker"))
SIC4 = st_data(. ,("SIC4"))
seq_SIC= st_data(. , ("seq_SIC")) 



measure1_allfirm = 0
measure2_allfirm = 0

index = (22::500)'
f = st_data(.,index)

max_SIC = max(seq_SIC)


for(i=1;i<=max_SIC;i++) {

rowindex = mm_which(seq_SIC:==i)
comnum_rel = comnum[rowindex]
nr_rows = length(comnum_rel)
uniquefirms = uniqrows(comnum_rel)
nr_unique = length(uniquefirms)
	measurevect1 = comnum_rel*0
	measurevect2 = comnum_rel*0

if (nr_unique==1) {
nr_entry1 = length(comnum_rel)
measurevect1=mm_repeat(0,nr_entry1)
measurevect2=mm_repeat(0,nr_entry1)

}

if (nr_unique>1) {


for (j=1;j<=nr_unique;j++) {
	firm_rel = uniquefirms[j]
	firm_position = mm_which(comnum_rel:==firm_rel)
	nr_firm_entry = length(firm_position)


	measure = f[rowindex,firm_position]
	
	check = rowmax(measure)
	measurevect = rowsum(measure)
	check2 = check-measurevect
	nonzero = mm_which(check2:<0)
	alert = length(nonzero)

	
	measurevect = rowsum(measure)
	nonzero_pos = mm_which(measurevect:>0)
	
	
	measure1 = length(measurevect[nonzero_pos])
	measure2 = length(uniqrows(measurevect[nonzero_pos]))


	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	
	
	}
} /* end if */

	measure1_allfirm = (measure1_allfirm \ measurevect1)
	measure2_allfirm = (measure2_allfirm \ measurevect2)
}


n = length(measure1_allfirm)
st_store(., ("measure1"), measure1_allfirm[2::n])
st_store(., ("measure2"), measure2_allfirm[2::n])


end


rename measure1 links1_diff
label var links1_diff "Direct links with double counting-RJV in same SIC"
rename measure2 links2_diff
label var links2_diff "Direct links without double counting-RJV in same SIC"



so comnum year
count if comnum==comnum[_n-1] & year==year[_n-1]
drop if comnum==comnum[_n-1] & year==year[_n-1]

keep  ticker year links1_tot links2_tot links1 links2 links1_same links2_same links1_diff links2_diff
so ticker year

desc

save links.dta, replace
log close



